import requests
from bs4 import BeautifulSoup
import pandas as pd


def get_html(url):
    print(url)
    # 模拟浏览器访问
    headers = {
        'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53',
    }
    response = requests.get(url, headers=headers)

    html = response.text
    return html


if __name__ == "__main__":
    keyword = 'vivo手机'
    url = 'https://search.jd.com/Search?keyword=' + keyword + '&enc=utf-8&suggest=2.his.0.0&wq=&pvid=52c7c3d1369344d1b0dff77b2befcb53'
    html = get_html(url)
    soup = BeautifulSoup(html, 'html.parser')
    good_list = soup.find_all('li', class_='gl-item')
    urlNo = ''
    Nos = []
    Names = []
    Shops = []
    Prices = []
    CommentCounts = []
    listAll = []
    for li in good_list:
        no = li['data-sku']
        name = li.find(class_="p-name p-name-type-2").find('em').get_text()
        if '京品手机' in name:
            name = name[6:]
        name = name[:16]
        shop = li.find(class_="p-shop").find('a').get_text()
        price = li.find(class_="p-price").find('i').get_text()
        listAll.append({'商品编号': no, '商品名称': name, '商品店铺': shop, '商品价格': price})
        # print(name)
        # print(editStr(name))
        Nos.append(no)
        Names.append(name)
        Shops.append(shop)
        Prices.append(price)

        urlNo += (no + ',')
    urlNo = urlNo[:-1]
    print("https://club.jd.com/comment/productCommentSummaries.action?referenceIds="+urlNo)
    jsons = get_html(
        "https://club.jd.com/comment/productCommentSummaries.action?referenceIds=" + urlNo)
    # file0 = open("C:/Users/百岁/Desktop/新建 文本文档.txt", "r", encoding="utf-8")
    # jsons = file0.read()
    # print(jsons)
    map = eval(jsons)
    row = 0
    for m in map.get('CommentsCount'):
        commentCount = m.get('CommentCountStr')
        CommentCounts.append(commentCount)
        if '万+' in commentCount:
            commentCount = commentCount[:-2]
            commentCount = commentCount + '0000'
        if '+' in commentCount:
            commentCount = commentCount[:-1]
        listAll[row]['商品评价数'] = int(commentCount)

    dict = {'编号': Nos, '型号': Names, '店铺名称': Shops, '价格': Prices, '评价数': CommentCounts}
    print(listAll[0]['商品评价数'])

    df = pd.DataFrame(dict)
    df.to_csv('京东数据.csv')
